package com.wxx.util;

import org.openqa.selenium.By;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriver;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.downloader.Downloader;
import us.codecraft.webmagic.selector.Html;
import us.codecraft.webmagic.selector.PlainText;

import java.io.Closeable;
import java.io.IOException;

/**
 * 使用Selenium调用浏览器进行渲染。目前仅支持chrome。<br>
 * 需要下载Selenium driver支持。<br>
 */
public class SeleniumDownloader implements Downloader, Closeable {
    static {
        CrawlerUtil.configChromeDriver();
    }

    public static ChromeDriver webDriver = new ChromeDriver();

    @Override
    public void close() throws IOException {
        webDriver.quit();
    }

    @Override
    public Page download(Request request, Task task) {
        webDriver.get(request.getUrl());
        return htmlToPage(request);
    }

    @Override
    public void setThread(int threadNum) {

    }

    protected Page htmlToPage(Request request) {
        WebElement webElement = webDriver.findElement(By.xpath("/html"));
        String content = webElement.getAttribute("outerHTML");
        Page page = new Page();
        page.setRawText(content);
        page.setHtml(new Html(content, request.getUrl()));
        page.setUrl(new PlainText(request.getUrl()));
        page.setRequest(request);
        return page;
    }
}
